---
title: "Replication Material 'Formal governance matters: When, how, and why states act on the IMF Executive Board'"
authors: "Timon Forster, Dan Honig, and Alexandros Kentikelenis"
date: "November 2024"
output: html_notebook
---

# Introduction and set-up

This R Notebook provides the code to replicate all analyses in 'Formal governance matters: When, how, and why states act on the IMF Executive Board', published with the *Review of International Political Economy*. https://dx.doi.org/10.1080/09692290.2024.2441136


Before performing text pre-processing, inspecting descriptive statistics, and running our regression analyses, we load the relevant packages and set the directory to datasets.

```{r}
# Load packages
library(dplyr)
library(tidyr)
library(ggplot2)
library(patchwork)
library(clubSandwich)
library(stargazer)
library(corrr)
library(dplyr)
library(magrittr)
library(tidyr)
library(quanteda)
library(readtext)
library(readr)
library(stringr)
library(scales)
library(zoo)
library(ggiraphExtra)
library(viridis)
library(tidyverse)

# Set directory
dir_local <- "C:/Users/timon/Dropbox/Honig Kentikelenis/RIPE submission/Replication/"
```

```{r}
# define vector for G5
vec_G5 <- c("USA", "DEU", "JPN", "FRA", "GBR")
factor_G5 <- factor(levels = c("USA", "DEU", "JPN", "FRA", "GBR"),
                                   ordered = TRUE)
```

# 1. Text pre-processing

## Data preparation

First, let's load the text and meta data.

```{r}
# Speeches
dt_speeches <- readRDS(paste0(dir_local, "/IMFEBM_Speeches_Nov2024_V1.rds"))
dt_meta <- readRDS(paste0(dir_local, "/IMFEBM_Meeting_Nov2024_V1.rds")) %>% 
  # keep only meetings with discussion and not in restricted session
  filter(disc_discussion == "Y") %>% 
  filter(disc_restricted == "N")
```


Second, we subset the text corpus to all speeches from state representatives. In doing so, we drop all information from documents, conclusion, or decisions. Further, we remove all observations with zero text (indicating attendance, but not participation). Finally, we drop duplicates --- when speakers of the same constituency delivered co-signed a statement. We also add the meta data from `dt_meta`.

```{r}
txt_raw <- dt_speeches %>% 
  # keep only state representatives
  filter(sp_position %in% c("ED", "AED", "TAED", "SeniorAdvisor", "Advisor", "Assistant")) %>%
  # drop duplicates, i.e., only dupl == 1 to be excluded
  filter(is.na(c_dupl) | c_dupl == 0) %>%
  # drop observations without text
  filter(c_text != "") %>% 
  # add meta data
  left_join(dt_meta, by = c("meeting_id", "disc_date"))
```



## Text regularization 

Before converting the data into a text corpus, we manually remove 's because this sometimes does not get processed correctly. In addition, we remove all numbers at this stage; we found this to be more effective than removing purely numerical tokens because the OCR sometimes misrepresents these, e.g., '96amend'. This primarily serves to reduce reduces the dimensionality (i.e., reduces the number of types/unique words).

```{r}
# Remove "'s" and numeric values
txt_clean <- txt_raw %>% 
  # fix apostrophe s
  mutate(c_text = str_replace_all(c_text, pattern = "'s", replacement = "")) %>% 
  # remove numbers
  mutate(c_text = str_replace_all(c_text, pattern = "[:digit:]", replacement = ""))
```


Subsequently, we convert the data into a text corpus, tokenize (while removing punctuation, symbols, separators, and splitting hyphens), lemmatize, and convert into a document-feature matrix.

```{r}
# Corpus
txt_corp <- corpus(txt_clean, text_field = "c_text", docid_field = "obs_id")

# Tokenization
txt_tok <- tokens(txt_corp,
                  remove_punct = TRUE, remove_symbols = TRUE, remove_separators = TRUE, 
                  split_hyphens = TRUE, 
                  include_docvars = TRUE)

# Lemmatization
list_lemma <- read.csv(paste0(dir_local, "/lemmatization-en.txt"), sep = "\t", as.is = TRUE,
                       header = FALSE,
                       fileEncoding = "UTF-8-BOM")
txt_lemma <- tokens_replace(txt_tok, pattern = list_lemma[, 2], replacement = list_lemma[, 1], 
                            valuetype = "fixed", 
                            case_insensitive = TRUE)

# Convert to dfm to check features
txt_dfm <- dfm(txt_lemma) # lemmatized
vec_features <- featnames(txt_dfm)
```


The text doesn’t process apostrophes and some numeric features remain. We identify and remove these in the following.

```{r}
# features with apostrophe
vec_apostrophe <- grep("*'s", vec_features, value = TRUE)
# head(vec_apostrophe, n = 25)

# create tbl with values and replacements
vec_apostrophe_replacement <- stringr::str_remove_all(vec_apostrophe, "'s")
dt_apostrophe <- tibble(rep = vec_apostrophe_replacement,
                         find = vec_apostrophe)

```


Now we can replace the apostrophes and remove the numeric features.

```{r}
# new token object
txt_tok2 <- tokens_replace(txt_lemma, 
                           pattern = dt_apostrophe$find, replacement = dt_apostrophe$rep, 
                           valuetype = "fixed", case_insensitive = TRUE)

# lemmatize again
txt_lemma2 <- tokens_replace(txt_tok2, pattern = list_lemma[, 2], replacement = list_lemma[, 1], 
                            valuetype = "fixed", 
                            case_insensitive = TRUE)

# convert to dfm and remove numbers
txt_dfm2 <- dfm(txt_lemma2)
```



After these steps, the text corpus encompasses 35,913 features (unique tokens, i.e., types) and 71,359 documents (which corresponds to the number of individual comments).

```{r}
txt_dfm2
nfeat(txt_dfm2)
ndoc(txt_dfm2)
```


Due to some potential OCR errors, typos, etc. we also want to remove all features that occur less than 10 times in the overall corpus.

```{r}
txt_dfm3_final <- dfm_trim(txt_dfm2, min_termfreq = 10)
```

```{r}
sum(ntoken(txt_dfm3_final))
```


We also want to remove stopwords for our baseline analysis; in robustness checks, we include the word counts with stopwords.

```{r}
# remove stopwords
vec_stopwords <- quanteda::stopwords("en")
# head(list_stopwords, n = 25)
txt_dfm_exsw <- dfm_remove(txt_dfm3_final, pattern = vec_stopwords)
```


```{r}
sum(ntoken(txt_dfm_exsw))
```

We also store the meta-data of the speeches in a separate object.

```{r}
# meta speeches
dt_meta_speech <- txt_clean %>%
  select(-c_text)
```


# 2. Participation, coalition-building, preferences

Next, we measure state behavior in the IMF EB by computing three indicators:

* participation: the total count of words spoken by constituency (only G5)

* coalition-building: the number of meetings in which state reps of the G5 refer to each other or release joint statements

* preferences: the total number of times that G5 reps mention key terms related to market liberalization


## Participation

```{r}
# dataframe with token counts
dt_ntoken <- tibble(obs_id = txt_clean$obs_id,
                    c_ntoken = ntoken(txt_dfm3_final),
                    c_ntoken_exsw = ntoken(txt_dfm_exsw))
```


Now we need to aggregate this to the constituency level; we subset to the G5.

```{r}
# participation: long format
dt_participation <- dt_ntoken %>%
  # attach speech meta data
  left_join(dt_meta_speech, by = c("obs_id")) %>%
  # subset to G5
  filter(sp_ccode %in% vec_G5) %>%
  # aggregate c_token to meeting level
  group_by(meeting_id, sp_ccode) %>% 
  summarize(sp_ntoken = sum(c_ntoken, na.rm = TRUE),
            sp_ntoken_exsw = sum(c_ntoken_exsw, na.rm = TRUE),
            .groups = "keep") %>% 
  ungroup()
```



## Coalition-building

To examine coalition-building, we use a dictionary approach where the key terms are the names of speakers.

```{r}
# dt_meta_speech %>% 
#   # filter to G5
#   filter(sp_ccode %in% vec_G5) %>% 
#   # keep only unique combinations of individual name and constituency
#   select(sp_clean, sp_ccode) %>% 
#   unique() %>% 
#   # sort by sp_ccode and alphabetically
#   arrange(desc(sp_ccode), sp_clean) %>% 
#   # extract surname
#   mutate(sp_surname = str_extract(sp_clean, pattern = "[^,]+"))
```


```{r}
dict_COALITION <- dictionary(list(ref_USA = c("Abbott", "Ahmed", "Baukol", "Brettschneider", "Brukoff", "Budington", "Byrne", "Dohlman", "Donovan", "Douglass", "Doyle", "Epstein", "Franco", "Gruber", "Haarsager", "Hall", "Heath", "Hull", "Jacklin", "John", "Kaplan", "Kushlis", "Leichter", "Lin", "Lindquist", "Lissakers", "Loevinger", "Lundsager", "Malloy", "Mathiasen", "Medearis", "Melese", "Newman", "Norton", "Parodi", "Pollard", "Quarles", "Ralyea", "Ramdas", "Redifer", "Rediker", "Reeve", "Ryan", "Schneider", "Segal", "Sheets", "Sobel", "Veltri", "Weisman", "Weiss", "Weller", "Wire", "Wood"),
                                  ref_DEU = c("Beck", "Bischofberger", "Bleinroth", "Blome", "Brabender", "Brinkmann", "Chowdhury", "Dahlhaus", "Denk", "Donecker", "Engelen", "Eppendorfer", "Esdar", "Fabig", "Finken", "Gerdes", "Guennewich", "Harzer", "Haupt", "Heinbuecher", "Hillebrand", "Holler", "Kalwey", "Kaul", "Kilp", "Knirsch", "Kranen", "Landbeck", "Meissner", "Merz", "Meyerhoefer", "Pillath", "Reichenstein", "Rieck", "Schmalzriedt", "Schoenberg", "Schollmeier", "Sebhatu", "Seidler", "Stein", "Stenzel", "Temmeyer", "Wagenhoefer", "Wezel", "Wolff", "Ziegler", "Kleist", "Stenglin"),
                                  ref_JPN = c("Fujii", "Fukushima", "Fukuyama", "Furusawa", "Gobe", "Harada", "Haruki", "Hinata", "Hiroshima", "Hishikawa", "Imamura", "Ishida", "Kajikawa", "Kanada", "Kashiwagi", "Kihara", "Kitahara", "Kitamura", "Kitamura", "Kitamura", "Komatsuzaki", "Kotegawa", "Makino", "Masuhara", "Matsutani", "Mesaki", "Miyashita", "Miyoshi", "Momma", "Naka", "Nishizawa", "Nomura", "Nozaki", "Ogushi", "Ono", "Oya", "Saito", "Sekine", "Shimoda", "Shinagawa", "Shoji", "Tahara", "Takeda", "Takeuchi", "Takimura", "Toyama", "Watanabe", "Yagi", "Yamaguchi", "Yamaoka", "Yamasaki", "Yanai", "Yanase", "Yoshimura"),
                                  ref_FRA = c("Autheman", "Badirou", "Basdevant", "Bauche", "Blancher", "Boitreaud", "Boucher", "Cailleteau", "Claveranne", "Collange", "Couillault", "Cumenge", "Cuny", "Delepierre", "Desruelle", "Djoufelkit", "Ducrocq", "Dumont", "Duquesne", "Duriez", "Farnoux", "Fayolle", "Fernandez", "Fontaine", "Fremann", "Gitton", "Gudmundsson", "Guyon", "Jakubowicz", "Jourcin", "Lahreche", "Gal", "Leost", "Letilly", "Mateos", "Milleron", "Rebillard", "Requin", "Reynaud", "Robert", "Piegue", "Sirat", "Terracol", "Weber", "Villeroche"),
                                  ref_GBR = c("Austin", "Beer", "Bor", "Brooke", "Brownlee", "Burgess", "Cathcart", "Collins", "Coutanche", "Cowie", "Dagustun", "Droop", "Drummond", "Duggan", "Elder", "Evans", "Farrant", "Field", "Fisher", "Gibbs", "Glennerster", "Goldby", "Gordon", "Gregory", "Hagan", "Hauser", "Hills", "Joicey", "Kantor", "Kell", "Kelmanson", "Killen", "Meads", "Mellor", "Merotto", "Newton, ", "O'Donnell", "Orlik", "Perks", "Pickford", "Pillai", "Rab", "Riach", "Roaf", "Robinson", "Scholar", "Segal", "Shields", "Stuart", "Talbot", "Taylor", "Thornton", "Walsh", "Ward", "Wechsberg", "White", "Whyte", "Williams", "Yeates")))
```


However, we only want to apply the dictionary to the subset of lending programs, and the prepared comments of the G5. Thus, we need to slightly modify the text pre-processing. There is also no need for lowercasing here to increase the quality of the matches.

```{r}
# Subset prepared comments, G5, lending programs
txt_coalition <- txt_clean %>% 
  filter(sp_ccode %in% vec_G5) %>% 
  filter(disc_topic == "Lending program") %>% 
  filter(c_prep == 1)

# Corpus
txt_corp_coalition <- corpus(txt_coalition, text_field = "c_text", docid_field = "obs_id")

# Tokenization
txt_tok_coalition <- tokens(txt_corp_coalition,
                  remove_punct = TRUE, remove_symbols = TRUE, remove_separators = TRUE, 
                  split_hyphens = TRUE, 
                  include_docvars = TRUE)
```
  

We now apply this dictionary to the cleaned tokens object.

```{r}
# Dictionary DFM
txt_coalition <- tokens_lookup(txt_tok_coalition, valuetype = "fixed",
                          dictionary = dict_COALITION,
                          case_insensitive = TRUE)

# Convert tokens object into DFM, group by speaker constituency
dict_dfm_temp <- dfm(txt_coalition)
coalition_dfm <- dfm_group(dict_dfm_temp, groups = interaction(meeting_id, sp_ccode))
```


We convert the data to a dataframe, and summarize the word counts accross all columns.

```{r}
# coalition long format
dt_coalition <- convert(coalition_dfm, to = "data.frame") %>% 
  # split meeting id and sp_clean
  separate(doc_id, into = c("meeting_id", "sp_ccode"),
           sep = "\\.(?=[A-Z]{3}$)", convert = TRUE) %>%
  # generate Boolean
  mutate(ref_USA = if_else(ref_usa >= 1, 1, 0),
         ref_DEU = if_else(ref_deu >= 1, 1, 0),
         ref_JPN = if_else(ref_jpn >= 1, 1, 0),
         ref_FRA = if_else(ref_fra >= 1, 1, 0),
         ref_GBR = if_else(ref_gbr >= 1, 1, 0)) %>% 
  # aggregate across all meetings, only Boolean
  group_by(sp_ccode) %>%
  summarize(nmeeting_USA = sum(ref_USA, na.rm = TRUE),
            nmeeting_DEU = sum(ref_DEU, na.rm = TRUE),
            nmeeting_JPN = sum(ref_JPN, na.rm = TRUE),
            nmeeting_FRA = sum(ref_FRA, na.rm = TRUE),
            nmeeting_GBR = sum(ref_GBR, na.rm = TRUE)) %>% 
  ungroup()
```


This output (after) setting the diagonals to zero) is saved separately as: 'EB_network_Nov2024.csv'.




## Preferences

To measure preferences for market liberalization, we define a simple dictionary (drawing on Kaya & Reay 2019).

```{r}
dict_MKT <- dictionary(list(INS = c("structural reforms?", "structural adjustment", "property rights?", "financial regulation", "regulation of financ(e|ial)", "infrastructur(e|al)", "good governance", "rule of law", "legal reforms?", "corruption"),
                            PRI = c("privati[sz]e", "privati[sz]ation", "privati[sz]ing", "deregulate", "deregulation")))
```


We apply this dictionary to the cleaned tokens object.

```{r}
# Subset G5 corpus
txt_lemma_G5 <- tokens_subset(txt_lemma2,
                              subset = c(sp_ccode %in% vec_G5))

# Dictionary DFM
txt_dict <- tokens_lookup(txt_lemma_G5, valuetype = "regex",
                          dictionary = dict_MKT,
                          case_insensitive = TRUE)

# Convert tokens object into DFM, group by speaker constituency
dict_dfm_temp <- dfm(txt_dict)
dict_dfm <- dfm_group(dict_dfm_temp, groups = interaction(meeting_id, sp_ccode))
```


We convert the data to a dataframe and calculate our measure of preferences for market liberalization.

```{r}
# MKT pref: long format
dt_dict_mkt <- convert(dict_dfm, to = "data.frame") %>% 
  # split meeting id and sp_clean
  separate(doc_id, into = c("meeting_id", "sp_ccode"),
           sep = "\\.(?=[A-Z]{3}$)", convert = TRUE) %>% 
  # calculate MKT pref
  mutate(sp_prefMKT = ins + pri) %>% 
  select(meeting_id, sp_ccode, sp_prefMKT)

# MKT pref: wide format
dt_dict_mkt_wide <- dt_dict_mkt %>% 
  pivot_wider(names_from = "sp_ccode", names_prefix = "sp_prefMKT_",
              values_from = "sp_prefMKT")
```



# 3. Descriptive analysis

First, let's load the meeting meta data (if not already done above).

```{r}
dt_meta <- readRDS(paste0(dir_local, "/IMFEBM_Meeting_Nov2024_V1.rds")) %>% 
  # keep only meetings with discussion and not in restricted session
  filter(disc_discussion == "Y") %>% 
  filter(disc_restricted == "N")
```



Second, we take the output from Section 2. We first expand the grid to allow for all possible meetings/levels of participation.

```{r}
dt_gov_text <- expand_grid(dt_meta$meeting_id, vec_G5) %>%
  rename(meeting_id = `dt_meta$meeting_id`,
         sp_ccode = vec_G5) %>%
  # keep unique values
  unique() %>%
  # create speaker factor (for graphs)
  mutate(sp_factor = factor(sp_ccode, 
                            levels = c("USA", "DEU", "JPN", "FRA", "GBR"),
                            ordered = TRUE)) %>% 
  # add meeting meta data
  left_join(dt_meta, by = c("meeting_id")) %>% 
  # attach participation
  left_join(dt_participation, by = c("meeting_id", "sp_ccode")) %>% 
  # attach MKT prefs
  left_join(dt_dict_mkt, by = c("meeting_id", "sp_ccode"))

# save file
saveRDS(dt_gov_text, paste0(dir_local, "/IMFEB_govtext_Nov24.rds"))
```



## Participation

```{r}
# load data
dt_gov_text <- readRDS(paste0(dir_local, "/IMFEB_govtext_Nov24.rds"))
```


Example France: For instance, in 2012 the French Executive Director intervened five times during a request for a three-year lending arrangement by the Central African Republic, with a total intervention length of 844 words (excluding stopwords) (IMF 2012).

```{r}
dt_gov_text %>%
  filter(sp_ccode == "FRA") %>%
  filter(disc_ccode == "CAF") %>% 
  filter(disc_year == 2012) %>% 
  filter(disc_topic == "Lending program") %>% 
  select(sp_ccode, disc_ccode, disc_year, sp_ntoken_exsw)
```

Now we can create a box plot for each of the three groups: emerging market economies; LICs; and small countries.

```{r}
dt_EMMA <- dt_gov_text %>% 
  filter(disc_EMMA_IMF == 1) %>% 
  select(sp_ccode, sp_ntoken_exsw) %>% 
  mutate(group_factor = "EMMA",
         group_factor_lbl = "Emerging economies")

dt_LICs <- dt_gov_text %>% 
  filter(disc_LIC_WB == 1) %>% 
  select(sp_ccode, sp_ntoken_exsw) %>% 
  mutate(group_factor = "LICs",
         group_factor_lbl = "Low-income countries")

dt_small <- dt_gov_text %>% 
  filter(disc_popsmall == 1) %>% 
  select(sp_ccode, sp_ntoken_exsw) %>% 
  mutate(group_factor = "Small",
         group_factor_lbl = "Small states")

dt_boxplot <- dt_EMMA %>%
  rbind(dt_LICs) %>% 
  rbind(dt_small) %>% 
  group_by(sp_ccode, group_factor) %>% 
  mutate(box_size = sum(!is.na(sp_ntoken_exsw))) %>% 
  ungroup() %>% 
  mutate(sp_ccode = factor(sp_ccode,
                           levels = c("USA", "DEU", "JPN", "FRA", "GBR"),
                           labels = c("USA", "DEU", "JPN", "FRA", "GBR"),
                           ordered = TRUE),
         group_factor_lbl = factor(group_factor_lbl, 
                                   labels = c("Emerging economies", "Low-income countries", "Small states"), ordered = TRUE))
```


We also add a horizontal line for the global mean of the G5.

```{r}
# global mean, i.e., use dt_gov_text
summary(dt_gov_text$sp_ntoken_exsw)
```


```{r}
dt_boxplot %>%
  ggplot(aes(x = sp_ntoken_exsw, y = sp_ccode, 
             fill = group_factor_lbl, label = box_size)) +
  geom_vline(xintercept = 354, linetype = 2) +
  geom_boxplot(varwidth = TRUE,
               show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~ group_factor_lbl, nrow = 1, scales = "free_x") +
  theme_bw() +
  hrbrthemes::theme_ipsum_rc() +
  theme(legend.position = "None",
        axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14),
        strip.text = element_text(size = 14)
        ) +
  labs(x = "Participation: Number of words",
       y = "Speaker country") +
  scale_x_continuous(minor_breaks = seq(0, 2000, by = 500),
                     breaks = c(0, 354, 500, 1000, 1500, 2000),
                     labels = scales::comma) + 
  scale_fill_manual(values = c("#3B528BFF", "#5DC863FF", "#FDE725FF"))
ggsave(filename = paste0(dir_local, "/participation_Nov24.png"),
       device = "png", height = 5, width = 8, unit = "in", bg = "white")
```


## Coalition


### Pre-processing


```{r}
library(tidygraph)
library(ggraph)
```


First, let's load the data.

```{r}
# load csv file
dt_network <- read.csv(paste0(dir_local, "/EB_network_Nov2024.csv"), 
                       header = TRUE, row.names = 1)
```


From this data, we create a node and edges dataset.

```{r}
net_nodes <- tibble(id = c(1:5),
                    label = c("USA", "DEU", "JPN", "FRA", "GBR"),
                    label_long = c("United States", "Germany", "Japan", 
                                   "France", "United Kingdom"),
                    chair_received = c(734, 374, 180, 554, 676))

net_edges <- dt_network %>% 
  rownames_to_column(var = "row_speaker") %>%
  pivot_longer(cols = -row_speaker, 
               names_to = "col_speaker",
               values_to = "reference_count") %>% 
  rename(from = row_speaker,
         to = col_speaker,
         weight = reference_count) %>% 
  mutate(from = case_when(from == "USA" ~ 1,
                          from == "DEU" ~ 2,
                          from == "JPN" ~ 3,
                          from == "FRA" ~ 4,
                          from == "GBR" ~ 5),
         to = case_when(to == "USA" ~ 1,
                        to == "DEU" ~ 2,
                        to == "JPN" ~ 3,
                        to == "FRA" ~ 4,
                        to == "GBR" ~ 5))
```


Second, we combine these to create a tbl_graph object.


```{r}
tidygraph_network_imf <- tbl_graph(nodes = net_nodes,
                                   edges = net_edges, directed = TRUE)
saveRDS(tidygraph_network_imf, paste0(dir_local, "/EB_network_obj_Nov2024.rds"))
```


### Visualization

We load the network object.

```{r}
tidyg_G5 <- readRDS(paste0(dir_local, "/EB_network_obj_Nov2024.rds"))
```


```{r}
set.seed(800)
ggraph(tidyg_G5) +
  geom_edge_arc(arrow = arrow(type = "closed", length = unit(3, "mm")),
                aes(width = weight), # label = weight, 
                start_cap = circle(5, "mm"),
                end_cap = circle(5, "mm"),
                label_dodge = unit(2.5, "mm"),
                strength = 0.4,
                color = "#D3D3D3") + 
  geom_node_point(aes(size = chair_received * 5)) +
  geom_node_text(aes(label = label_long),
                 vjust = 2.2, size = 3) +
  labs(edge_width = "Number of meetings with references:") +
  theme_graph() + # theme_bw to check dimensions
  guides(size = "none",
         color = "none",
         label = "none",
         edge_color = "none") +
    scale_y_continuous(limits = c(-0.8, 0.8)) +
  theme(legend.key.height = unit(1, 'cm'),
        legend.key.width = unit(1, 'cm'),
        legend.position = "bottom")
ggsave(filename = paste0(dir_local, "/coalition_G5_Nov24.png"),
       device = "png", height = 5, width = 8, unit = "in", )
```



Number of meetings in sample: 1,661 (lending programs)

```{r}
n_distinct(txt_coalition$meeting_id)
```

Number of joint statements

```{r}
txt_clean %>%
  filter(sp_ccode %in% c("FRA", "GBR")) %>%
  group_by(meeting_id) %>% 
  summarize(c_joint = sum(c_joint, na.rm = TRUE), .groups = "keep") %>% 
  ungroup() %>% 
  summarize(total_joint = sum(c_joint, na.rm = TRUE))
```



## Market liberalization

We load the data and aggregate by the G5.

```{r}
dt_G5 <- readRDS(paste0(dir_local, "/IMFEB_govtext_Nov24.rds")) %>%
  # subset G5
  filter(sp_ccode %in% vec_G5) %>% 
  # group by meeting (i.e., G5)
  group_by(meeting_id) %>% 
  summarize(G5_prefMKT = sum(sp_prefMKT, na.rm = TRUE),
            disc_EMMA_IMF = max(disc_EMMA_IMF),
            disc_LIC_WB = max(disc_LIC_WB),
            disc_popsmall = max(disc_popsmall),
            disc_factor = first(disc_popsmall), .groups = "keep") %>% 
  ungroup()
```


```{r}
n_distinct(dt_G5$meeting_id)
summary(dt_G5$G5_prefMKT)
```


```{r}
dt_EMMA_G5 <- dt_G5 %>% 
  filter(disc_EMMA_IMF == 1) %>% 
  select(G5_prefMKT) %>% 
  mutate(group_factor = "EMMA",
         group_factor_lbl = "Emerging economies")

dt_LICs_G5 <- dt_G5 %>% 
  filter(disc_LIC_WB == 1) %>% 
  select(G5_prefMKT) %>% 
  mutate(group_factor = "LICs",
         group_factor_lbl = "Low-income countries")

dt_small_G5 <- dt_G5 %>% 
  filter(disc_popsmall == 1) %>% 
  select(G5_prefMKT) %>% 
  mutate(group_factor = "Small",
         group_factor_lbl = "Small states")

dt_boxplot_G5 <- dt_EMMA_G5 %>%
  rbind(dt_LICs_G5) %>% 
  rbind(dt_small_G5) %>% 
  group_by(group_factor) %>% 
  mutate(box_size = sum(!is.na(G5_prefMKT))) %>% 
  ungroup() %>% 
  mutate(group_factor_lbl = factor(group_factor_lbl, 
                                   labels = c("Emerging economies", "Low-income countries", "Small states"), ordered = TRUE))
```


We add a horizontal line for the global mean of the G5.

```{r}
# global mean, i.e., use dt_gov_text
summary(dt_G5$G5_prefMKT)
```

```{r}
dt_boxplot_G5 %>%
  ggplot(aes(x = G5_prefMKT, y = group_factor_lbl, 
             fill = group_factor_lbl, label = box_size)) +
  geom_vline(xintercept = 6.616, linetype = 2) +
  geom_boxplot(varwidth = TRUE,
               show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~ group_factor_lbl, nrow = 1, scales = "free_x") +
  theme_bw() +
  hrbrthemes::theme_ipsum_rc() +
  theme(legend.position = "None",
        axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14),
        strip.text = element_text(size = 14)) +
  labs(x = "G5 preferences for market liberalization",
       y = element_blank()) +
  scale_x_continuous(minor_breaks = seq(0, 40, by = 5),
                     breaks = c(0, 6.6, 10, 20, 30, 40),
                     labels = scales::comma) +
  scale_y_discrete(labels = element_blank()) +
  scale_fill_manual(values = c("#3B528BFF", "#5DC863FF", "#FDE725FF"))
ggsave(filename = paste0(dir_local, "/preferences_Nov24.png"),
       device = "png", height = 5, width = 8, unit = "in", bg = "white")
```



# 4. Determinants of participation


## Data

In the first part of our analysis, we examine the determinants of participation. We first load the output file from the text analysis which includes our indicator of participation.

```{r}
dt_gov_text <- readRDS(paste0(dir_local, "/IMFEB_govtext_Nov24.rds")) %>%
  # subset to G5
  filter(sp_ccode %in% vec_G5)
```


Second, we load the controls. These include the following:

* `dytrade_log`: log dyadic trade between speaker-country and country-under-discussion (COW 4.0)

* `F0.netODA_ln`: dyadic aid between speaker-country and country-under-discussion, log net ODA (OECD ODA)

* `UNGA_affin`: UNGA voting affinity (version 28) between speaker-country and country-under-discussion

* `lngdppc_WDI`, `lngdp_WDI`, `res_import_WDI`, `debtservice_WDI`, `tottrade_USD_WDI`, `debt_USD_WDI`: GDP per capita (log), GDP (log), reserves (% of imports), debt service, total trade (USD) and external debt (USD) of the country-under-discussion (all WDI 2020)

* `cabal_WEO`, `govdebt_WEO`: current account deficits (% of GDP) and government gross debt (% of GDP) of the country-under-discussion (WEO 2021)

* `UNSC_member`: UN Security Council membership of country-under-discussion (Dreher et al. 2009)

* `libdem_VDem`: Liberal democracy index of the country-under-discussion (V-DEM, Coppedge et al. 2022)

* `lnclaims_BIS`: outstanding bank claims (log) of speaker-country in country-under-discussion (BIS)

* `n_comments`: Number of comments in a meeting (generated from `txt_raw` above)


```{r}
dt_deter_controls <- readRDS(paste0(dir_local, "/dt_deterG5_controls_Nov24.rds"))
```


We merge these two datasets based on `meeting_id` (which captures the country-under-discussion and time) and `sp_ccode` (the speaker-country). We also define FEs and we calculate the share of trade and outstanding debt, indicating the recipient's importance for global exchange and global debt. 

```{r}
# merge datasets
dt_deter <- dt_gov_text %>% 
  # add controls
  left_join(dt_deter_controls, by = c("meeting_id", "sp_ccode")) %>%
  # FEs
  mutate(sp_FE = as.factor(sp_ccode),
         disc_FE = as.factor(disc_ccode),
         year_FE = as.factor(disc_year)) %>% 
  # define other factors
  mutate(UNSC_member = as.factor(UNSC_member)) %>%
  # define dyad FE
  group_by(sp_ccode, disc_ccode) %>% 
  mutate(disc_sp_dyad = cur_group_id()) %>% 
  ungroup() %>% 
  mutate(dyad_FE = as.factor(disc_sp_dyad)) %>% 
  # shares trade and debt
  group_by(disc_year, sp_ccode) %>% 
  mutate(total_trade_USD_WDI = sum(tottrade_USD_WDI, na.rm = TRUE),
         total_debt_USD_WDI = sum(debt_USD_WDI, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(trade_shr_WDI = tottrade_USD_WDI / total_trade_USD_WDI,
         debt_shr_WDI = debt_USD_WDI / total_debt_USD_WDI)
```

```{r}
saveRDS(dt_deter, paste0(dir_local, "/dt_deterG5_Nov24.rds"))
```


## Analysis

### Baseline

First load the data and define subsets (if not already done above).


```{r}
# full sample
dt_deter <- readRDS(paste0(dir_local, "/dt_deterG5_Nov24.rds"))

# subset lending and article iv
dt_lending <- dt_deter %>% 
  filter(disc_topic == "Lending program")
dt_article <- dt_deter %>% 
  filter(disc_topic == "Article IV")
```


#### Model 1, Table 2: Determinants of participation: Regression analysis (no disc_country FEs)

```{r}
# baseline
poi_P1 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P1 <- sqrt(diag(vcovCR(poi_P1,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# dyadic BIS
poi_P2 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lnclaims_BIS + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P2 <- sqrt(diag(vcovCR(poi_P2,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# additional econ controls
poi_P3 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lngdp_WDI + trade_shr_WDI + debt_shr_WDI + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P3 <- sqrt(diag(vcovCR(poi_P3,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# additional fundamentals
poi_P4 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + res_import_WDI + debtservice_WDI + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P4 <- sqrt(diag(vcovCR(poi_P4,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# meeting level
poi_P5 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + n_comments + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P5 <- sqrt(diag(vcovCR(poi_P5,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))
```


```{r}
list_models <- list(poi_P1, poi_P2, poi_P3, poi_P4, poi_P5)
stargazer(list_models,
          dep.var.labels = c("Words spoken (excl. stopwords)"),
          covariate.labels = c("Dyadic trade (log)", "Dyadic aid (log)", "UNGA affinity score", "GDP per capita (log)", "UNSC membership", "Liberal democracy", "Current account balance (% of GDP)", "General govt. gross debt (% of GDP)", "Bilateral banking claims (log)", "GDP (log)", "Trade (% of EMDEs)", "External debt (% of EMDEs)", "Reserves (% imports)", "Debt service", "Number of comments", "Constant"),
          omit = c("*year_FE*", "*disc_FE*", "*sp_FE*", "*dyad_FE*"),
          se = list(rse_P1, rse_P2, rse_P3, rse_P4, rse_P5),
          title = "", font.size = "small",
          add.lines = list(
            c("Estimation", rep(c("Poisson"), each = 5)),
            c("Country-under-discussion FEs", rep(c("No"), 5)),
            c("Speaker Country FEs", rep(c("Yes"), 5)),
            c("Year FEs", rep("Yes", each = 5))),
        # type = "latex", out = "reg1_G5deter_spFE_Nov24.tex")
          type = "html", out = "reg1_G5deter_spFE_Nov24.doc")
```



Marginal effects

```{r}
# dyadic trade, one sd: 2.37621
sd(dt_deter$dytrade_log, na.rm = TRUE)
# marginal effect: coeff  0.056
exp(0.056 * 2.37621)
```

```{r}
# dyadic aid, one sd: 1.763361
sd(dt_deter$F0.netODA_ln, na.rm = TRUE)
# marginal effect: coeff  0.022
exp(0.022 * 1.763361)
```



#### Model 2, Table 3: Determinants of participation: Regression analysis (with disc_country FEs)

```{r}
# baseline
poi_P1 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P1 <- sqrt(diag(vcovCR(poi_P1,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# dyadic BIS
poi_P2 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lnclaims_BIS + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P2 <- sqrt(diag(vcovCR(poi_P2,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# additional econ controls
poi_P3 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lngdp_WDI + trade_shr_WDI + debt_shr_WDI + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P3 <- sqrt(diag(vcovCR(poi_P3,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# additional fundamentals
poi_P4 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + res_import_WDI + debtservice_WDI + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P4 <- sqrt(diag(vcovCR(poi_P4,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# meeting level
poi_P5 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + n_comments + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P5 <- sqrt(diag(vcovCR(poi_P5,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))
```


```{r}
list_models <- list(poi_P1, poi_P2, poi_P3, poi_P4, poi_P5)
stargazer(list_models,
          dep.var.labels = c("Words spoken"),
          covariate.labels = c("Dyadic trade (log)", "Dyadic aid (log)", "UNGA affinity score", "GDP per capita (log)", "UNSC membership", "Liberal democracy", "Current account balance (% of GDP)", "General govt. gross debt (% of GDP)", "Bilateral banking claims (log)", "GDP (log)", "Trade (% of EMDEs)", "External debt (% of EMDEs)", "Reserves (% imports)", "Debt service", "Number of comments", "Constant"),
          omit = c("*year_FE*", "*disc_FE*", "*sp_FE*", "*dyad_FE*"),
          se = list(rse_P1, rse_P2, rse_P3, rse_P4, rse_P5),
          title = "", font.size = "small",
          add.lines = list(
            c("Estimation", rep(c("Poisson"), each = 5)),
            c("Country-under-discussion FEs", rep(c("Yes"), 5)),
            c("Country-under-discussion FEs", rep(c("No"), 5)),
            c("Speaker Country FEs", rep(c("Yes"), 5)),
            c("Year FEs", rep("Yes", each = 5))),
        # type = "latex", out = "reg2_G5deter_discFEspFE_Nov24.tex")
          type = "html", out = "reg2_G5deter_discFEspFE_Nov24.doc")
```




Marginal effects

```{r}
# dyadic trade, one sd: 2.37621
sd(dt_deter$dytrade_log, na.rm = TRUE)
# marginal effect: coeff  0.030
exp(0.030 * 2.37621)
```

```{r}
# dyadic aid, one sd: 1.763361
sd(dt_deter$F0.netODA_ln, na.rm = TRUE)
# marginal effect: coeff  0.020
exp(0.020 * 1.763361)
```


### Robustness

#### With stopwords

Table A1: Determinants of participation: including stopwords

```{r}
# baseline
poi_P1 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P1 <- sqrt(diag(vcovCR(poi_P1,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# dyadic BIS
poi_P2 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lnclaims_BIS + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P2 <- sqrt(diag(vcovCR(poi_P2,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# additional econ controls
poi_P3 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lngdp_WDI + trade_shr_WDI + debt_shr_WDI + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P3 <- sqrt(diag(vcovCR(poi_P3,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# additional fundamentals
poi_P4 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + res_import_WDI + debtservice_WDI + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P4 <- sqrt(diag(vcovCR(poi_P4,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# meeting level
poi_P5 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + n_comments + year_FE + sp_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P5 <- sqrt(diag(vcovCR(poi_P5,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))
```


```{r}
list_models <- list(poi_P1, poi_P2, poi_P3, poi_P4, poi_P5)
stargazer(list_models,
          dep.var.labels = c("Words spoken (incl. stopwords)"),
          covariate.labels = c("Dyadic trade (log)", "Dyadic aid (log)", "UNGA affinity score", "GDP per capita (log)", "UNSC membership", "Liberal democracy", "Current account balance (% of GDP)", "General govt. gross debt (% of GDP)", "Bilateral banking claims (log)", "GDP (log)", "Trade (% of EMDEs)", "External debt (% of EMDEs)", "Reserves (% imports)", "Debt service", "Number of comments", "Constant"),
          omit = c("*year_FE*", "*disc_FE*", "*sp_FE*", "*dyad_FE*"),
          se = list(rse_P1, rse_P2, rse_P3, rse_P4, rse_P5),
          title = "", font.size = "small",
          add.lines = list(
            c("Estimation", rep(c("Poisson"), each = 5)),
            c("Country-under-discussion FEs", rep(c("No"), 5)),
            c("Speaker Country FEs", rep(c("Yes"), 5)),
            c("Year FEs", rep("Yes", each = 5))),
       # type = "latex", out = "rob1_G5deter_spFE_Nov24.tex")
          type = "html", out = "rob1_G5deter_spFE_Nov24.doc")
```



Table A2: Determinants of participation: including stopwords

```{r}
# baseline
poi_P1 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P1 <- sqrt(diag(vcovCR(poi_P1,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# dyadic BIS
poi_P2 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lnclaims_BIS + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P2 <- sqrt(diag(vcovCR(poi_P2,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# additional econ controls
poi_P3 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lngdp_WDI + trade_shr_WDI + debt_shr_WDI + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P3 <- sqrt(diag(vcovCR(poi_P3,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# additional fundamentals
poi_P4 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + res_import_WDI + debtservice_WDI + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P4 <- sqrt(diag(vcovCR(poi_P4,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))

# meeting level
poi_P5 <- glm(sp_ntoken ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + n_comments + year_FE + sp_FE + disc_FE,
              data = dt_deter, quasipoisson(link = "log"))
rse_P5 <- sqrt(diag(vcovCR(poi_P5,
                           cluster = dt_deter$disc_sp_dyad,
                           type = "CR2")))
```


```{r}
list_models <- list(poi_P1, poi_P2, poi_P3, poi_P4, poi_P5)
stargazer(list_models,
          dep.var.labels = c("Words spoken (incl. stopwords)"),
          covariate.labels = c("Dyadic trade (log)", "Dyadic aid (log)", "UNGA affinity score", "GDP per capita (log)", "UNSC membership", "Liberal democracy", "Current account balance (% of GDP)", "General govt. gross debt (% of GDP)", "Bilateral banking claims (log)", "GDP (log)", "Trade (% of EMDEs)", "External debt (% of EMDEs)", "Reserves (% imports)", "Debt service", "Number of comments", "Constant"),
          omit = c("*year_FE*", "*disc_FE*", "*sp_FE*", "*dyad_FE*"),
          se = list(rse_P1, rse_P2, rse_P3, rse_P4, rse_P5),
          title = "", font.size = "small",
          add.lines = list(
            c("Estimation", rep(c("Poisson"), each = 5)),
            c("Country-under-discussion FEs", rep(c("Yes"), 5)),
            c("Speaker Country FEs", rep(c("Yes"), 5)),
            c("Year FEs", rep("Yes", each = 5))),
        # type = "latex", out = "rob2_G5deter_discFEspFE_Nov24.tex")
          type = "html", out = "rob2_G5deter_discFEspFE_Nov24.doc")
```


#### Lending programs only

Table A3: Determinants of participation: Lending programs (no disc_country FEs)

```{r}
# baseline
poi_P1 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + year_FE + sp_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P1 <- sqrt(diag(vcovCR(poi_P1,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))

# dyadic BIS
poi_P2 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lnclaims_BIS + year_FE + sp_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P2 <- sqrt(diag(vcovCR(poi_P2,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))

# additional econ controls
poi_P3 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lngdp_WDI + trade_shr_WDI + debt_shr_WDI + year_FE + sp_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P3 <- sqrt(diag(vcovCR(poi_P3,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))

# additional fundamentals
poi_P4 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + res_import_WDI + debtservice_WDI + year_FE + sp_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P4 <- sqrt(diag(vcovCR(poi_P4,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))

# meeting level
poi_P5 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + n_comments + year_FE + sp_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P5 <- sqrt(diag(vcovCR(poi_P5,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))
```


```{r}
list_models <- list(poi_P1, poi_P2, poi_P3, poi_P4, poi_P5)
stargazer(list_models,
          dep.var.labels = c("Words spoken (excl. stopwords)"),
          covariate.labels = c("Dyadic trade (log)", "Dyadic aid (log)", "UNGA affinity score", "GDP per capita (log)", "UNSC membership", "Liberal democracy", "Current account balance (% of GDP)", "General govt. gross debt (% of GDP)", "Bilateral banking claims (log)", "GDP (log)", "Trade (% of EMDEs)", "External debt (% of EMDEs)", "Reserves (% imports)", "Debt service", "Number of comments", "Constant"),
          omit = c("*year_FE*", "*disc_FE*", "*sp_FE*", "*dyad_FE*"),
          se = list(rse_P1, rse_P2, rse_P3, rse_P4, rse_P5),
          title = "", font.size = "small",
          add.lines = list(
            c("Estimation", rep(c("Poisson"), each = 5)),
            c("Sample", rep(c("Lending programs"), each = 5)),
            c("Country-under-discussion FEs", rep(c("No"), 5)),
            c("Speaker Country FEs", rep(c("Yes"), 5)),
            c("Year FEs", rep("Yes", each = 5))),
        # type = "latex", out = "rob3_G5deter_spFE_Nov24.tex")
          type = "html", out = "rob3_G5deter_spFE_Nov24.doc")
```


Table A4: Determinants of participation: Lending programs (with disc_country FEs)

```{r}
# baseline
poi_P1 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + year_FE + sp_FE + disc_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P1 <- sqrt(diag(vcovCR(poi_P1,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))

# dyadic BIS
poi_P2 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lnclaims_BIS + year_FE + sp_FE + disc_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P2 <- sqrt(diag(vcovCR(poi_P2,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))

# additional econ controls
poi_P3 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lngdp_WDI + trade_shr_WDI + debt_shr_WDI + year_FE + sp_FE + disc_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P3 <- sqrt(diag(vcovCR(poi_P3,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))

# additional fundamentals
poi_P4 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + res_import_WDI + debtservice_WDI + year_FE + sp_FE + disc_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P4 <- sqrt(diag(vcovCR(poi_P4,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))

# meeting level
poi_P5 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + n_comments + year_FE + sp_FE + disc_FE,
              data = dt_lending, quasipoisson(link = "log"))
rse_P5 <- sqrt(diag(vcovCR(poi_P5,
                           cluster = dt_lending$disc_sp_dyad,
                           type = "CR2")))
```


```{r}
list_models <- list(poi_P1, poi_P2, poi_P3, poi_P4, poi_P5)
stargazer(list_models,
          dep.var.labels = c("Words spoken (excl. stopwords)"),
          covariate.labels = c("Dyadic trade (log)", "Dyadic aid (log)", "UNGA affinity score", "GDP per capita (log)", "UNSC membership", "Liberal democracy", "Current account balance (% of GDP)", "General govt. gross debt (% of GDP)", "Bilateral banking claims (log)", "GDP (log)", "Trade (% of EMDEs)", "External debt (% of EMDEs)", "Reserves (% imports)", "Debt service", "Number of comments", "Constant"),
          omit = c("*year_FE*", "*disc_FE*", "*sp_FE*", "*dyad_FE*"),
          se = list(rse_P1, rse_P2, rse_P3, rse_P4, rse_P5),
          title = "", font.size = "small",
          add.lines = list(
            c("Estimation", rep(c("Poisson"), each = 5)),
            c("Sample", rep(c("Lending programs"), each = 5)),
            c("Country-under-discussion FEs", rep(c("Yes"), 5)),
            c("Speaker Country FEs", rep(c("Yes"), 5)),
            c("Year FEs", rep("Yes", each = 5))),
        # type = "latex", out = "rob4_G5deter_discFEspFE_Nov24.tex")
          type = "html", out = "rob4_G5deter_discFEspFE_Nov24.doc")
```



#### Article IVs (exsw)

Table A5: Determinants of participation: Article IVs (no disc_country FE)

```{r}
# baseline
poi_P1 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + year_FE + sp_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P1 <- sqrt(diag(vcovCR(poi_P1,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))

# dyadic BIS
poi_P2 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lnclaims_BIS + year_FE + sp_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P2 <- sqrt(diag(vcovCR(poi_P2,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))

# additional econ controls
poi_P3 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lngdp_WDI + trade_shr_WDI + debt_shr_WDI + year_FE + sp_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P3 <- sqrt(diag(vcovCR(poi_P3,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))

# additional fundamentals
poi_P4 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + res_import_WDI + debtservice_WDI + year_FE + sp_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P4 <- sqrt(diag(vcovCR(poi_P4,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))

# meeting level
poi_P5 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + n_comments + year_FE + sp_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P5 <- sqrt(diag(vcovCR(poi_P5,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))
```


```{r}
list_models <- list(poi_P1, poi_P2, poi_P3, poi_P4, poi_P5)
stargazer(list_models,
          dep.var.labels = c("Words spoken (excl. stopwords)"),
          covariate.labels = c("Dyadic trade (log)", "Dyadic aid (log)", "UNGA affinity score", "GDP per capita (log)", "UNSC membership", "Liberal democracy", "Current account balance (% of GDP)", "General govt. gross debt (% of GDP)", "Bilateral banking claims (log)", "GDP (log)", "Trade (% of EMDEs)", "External debt (% of EMDEs)", "Reserves (% imports)", "Debt service", "Number of comments", "Constant"),
          omit = c("*year_FE*", "*disc_FE*", "*sp_FE*", "*dyad_FE*"),
          se = list(rse_P1, rse_P2, rse_P3, rse_P4, rse_P5),
          title = "", font.size = "small",
          add.lines = list(
            c("Estimation", rep(c("Poisson"), each = 5)),
            c("Sample", rep(c("Article IVs"), each = 5)),
            c("Country-under-discussion FEs", rep(c("No"), 5)),
            c("Speaker Country FEs", rep(c("Yes"), 5)),
            c("Year FEs", rep("Yes", each = 5))),
        # type = "latex", out = "rob_G5deter_spFE_Nov24.tex")
          type = "html", out = "rob5_G5deter_spFE_Nov24.doc")
```


Table A6: Determinants of participation: Article IVs

```{r}
# baseline
poi_P1 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + year_FE + sp_FE + disc_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P1 <- sqrt(diag(vcovCR(poi_P1,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))

# dyadic BIS
poi_P2 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lnclaims_BIS + year_FE + sp_FE + disc_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P2 <- sqrt(diag(vcovCR(poi_P2,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))

# additional econ controls
poi_P3 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + lngdp_WDI + trade_shr_WDI + debt_shr_WDI + year_FE + sp_FE + disc_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P3 <- sqrt(diag(vcovCR(poi_P3,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))

# additional fundamentals
poi_P4 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + res_import_WDI + debtservice_WDI + year_FE + sp_FE + disc_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P4 <- sqrt(diag(vcovCR(poi_P4,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))

# meeting level
poi_P5 <- glm(sp_ntoken_exsw ~
                dytrade_log + F0.netODA_ln + UNGA_affin + lngdppc_WDI + UNSC_member + libdem_VDem + cabal_WEO + govdebt_WEO + n_comments + year_FE + sp_FE + disc_FE,
              data = dt_article, quasipoisson(link = "log"))
rse_P5 <- sqrt(diag(vcovCR(poi_P5,
                           cluster = dt_article$disc_sp_dyad,
                           type = "CR2")))
```


```{r}
list_models <- list(poi_P1, poi_P2, poi_P3, poi_P4, poi_P5)
stargazer(list_models,
          dep.var.labels = c("Words spoken (excl. stopwords)"),
          covariate.labels = c("Dyadic trade (log)", "Dyadic aid (log)", "UNGA affinity score", "GDP per capita (log)", "UNSC membership", "Liberal democracy", "Current account balance (% of GDP)", "General govt. gross debt (% of GDP)", "Bilateral banking claims (log)", "GDP (log)", "Trade (% of EMDEs)", "External debt (% of EMDEs)", "Reserves (% imports)", "Debt service", "Number of comments", "Constant"),
          omit = c("*year_FE*", "*disc_FE*", "*sp_FE*", "*dyad_FE*"),
          se = list(rse_P1, rse_P2, rse_P3, rse_P4, rse_P5),
          title = "", font.size = "small",
          add.lines = list(
            c("Estimation", rep(c("Poisson"), each = 5)),
            c("Sample", rep(c("Article IVs"), each = 5)),
            c("Country-under-discussion FEs", rep(c("Yes"), 5)),
            c("Speaker Country FEs", rep(c("Yes"), 5)),
            c("Year FEs", rep("Yes", each = 5))),
         # type = "latex", out = "rob6_G5deter_discFEspFE_Nov24.tex")
          type = "html", out = "rob6_G5deter_discFEspFE_Nov24.doc")
```





# 5. Predicting conditionality

## Data

Load the output text data (if not already done above).

```{r}
dt_gov_text <- readRDS(paste0(dir_local, "/IMFEB_govtext_Nov24.rds"))
```


We aggregate the indicators to the yearly level for the G5 as a collective actor, and we only infer market-liberalizing preferences from meetings on lending programs.

```{r}
dt_pref <- dt_gov_text %>%
  # keep only G5
  filter(sp_ccode %in% vec_G5) %>%
  # keep only lending programs
  filter(disc_topic == "Lending program") %>%
  # calculate total preferences, by year
  group_by(disc_country, disc_ccode, disc_ccode_COW, disc_cname_COW, disc_ncode_COW, disc_year) %>% 
  summarize(G5_prefMKT = sum(sp_prefMKT, na.rm = TRUE),
            G5_ntoken = sum(sp_ntoken_exsw, na.rm = TRUE),
            .groups = "keep") %>% 
  ungroup()
```


We create a meta dataset with country codes.

```{r}
meta_country <- dt_pref %>% 
  distinct(disc_country, disc_ccode, disc_ccode_COW, disc_cname_COW, disc_ncode_COW)
```


In addition, we load the dataset with dependent variable (conditionality data) and the following control variables, all measured for the country-under-discussion:

* `BA1MKT`: number of market-liberalizing reforms (Kentikelenis & Stubbs 2023)

* `lngdp_WDI`, `gdpgrowth_WDI`, `res_import_WDI`, `debtservice_WDI`, `tradegdp_WDI`: GDP (log), GDP growth (%), reserves (% of imports), debt service, trade (% of GDP) (all WDI 2020)

* `cabal_WEO`: current account deficit (% of GDP) (WEO 2021)

* `elec_VDem`, `libdem_VDem`: dummy variable for upcoming elections (legislative or executive) and Liberal democracy index of the country-under-discussion (V-DEM, Coppedge et al. 2022)

* `UNSC_member`: UN Security Council membership of country-under-discussion (Dreher et al. 2009)

* `regqual_WGI`: regulatory quality of country-under-discussion (Kaufmann et al., 2010)

* `fi_index_pd`: level of economic freedom of country-under-discussion (Dahlberg et al., 2024)

* `L1.cumIMF55`: cumulative years under IMF program since 1980 (Kentikelenis & Stubbs 2023)

```{r}
dt_pred_controls <- readRDS(paste0(dir_local, "/dt_predG5_controls_Nov24.rds"))
```


Let's merge these two together.

```{r}
# merge
dt_pred <- dt_pred_controls %>%
  # add country meta
  left_join(meta_country, by = c("disc_country")) %>% 
  # attach preferences (and G5 word count)
  left_join(dt_pref, by = c("L1.cond_year" = "disc_year",
                            "disc_country", "disc_ccode", 
                            "disc_ccode_COW", "disc_ncode_COW",
                            "disc_cname_COW")) %>% 
  # define FEs
  mutate(disc_FE = as.factor(disc_ccode),
         year_FE = as.factor(cond_year))  
```


## Analysis

Alternatively, load the file directly.

```{r}
dt_pred <- readRDS(paste0(dir_local, "/dt_predG5_Nov24.rds"))
```



### Table 4: Conditionality and preferences for market liberalization: Regression analysis

With year FEs only


```{r}
# baseline: year FEs only
mkt_poi1 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + year_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse1 <- sqrt(diag(vcovCR(mkt_poi1, cluster = dt_pred$disc_ccode, type = "CR2"))) # CR1S

# tokens
mkt_poi2 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + G5_ntoken + year_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse2 <- sqrt(diag(vcovCR(mkt_poi2, cluster = dt_pred$disc_ccode, type = "CR2")))

# UNSC
mkt_poi3 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + UNSC_member + year_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse3 <- sqrt(diag(vcovCR(mkt_poi3, cluster = dt_pred$disc_ccode, type = "CR2")))

# res_import_WDI, FP.CPI.TOTL.ZG (infl_WEO), debtservice_WDI
mkt_poi4 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + res_import_WDI + debtservice_WDI + year_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse4 <- sqrt(diag(vcovCR(mkt_poi4, cluster = dt_pred$disc_ccode, type = "CR2")))

# Trade + EF
mkt_poi5 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + tradegdp_WDI + fi_index_pd + year_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse5 <- sqrt(diag(vcovCR(mkt_poi5, cluster = dt_pred$disc_ccode, type = "CR2")))
```

```{r}
stargazer(mkt_poi1, mkt_poi2, mkt_poi3, mkt_poi4, mkt_poi5,
          dep.var.labels = c("Market-liberalization reforms"),
          covariate.labels = c("MKT preferences (t-1)", "GDP (log)", "Elections", "Liberal democracy", "GDP growth", "Current account balance (% of GDP)", "Past IMF programs", "Regulatory quality", "Words spoken (G5)", "UNSC membership", "Reserves (% imports)", "Debt service", "Trade (% GDP)", "Economic Freedom Index", "Constant"),
          omit = c("*year_FE*", "*disc_FE*"),
          se = list(mkt_rse1, mkt_rse2, mkt_rse3, mkt_rse4, mkt_rse5),
          title = "", font.size = "small",
          add.lines = list(c("Country FEs", rep(c("No"), 5)),
                           c("Year FEs", rep(c("Yes"), 5))),
          # type = "latex", out = "pred1_regG5_yearFEs_Nov24.tex")
          type = "html", out = "pred1_regG5_yearFEs_Nov24.doc")
```


Marginal effects

```{r}
# market liberalization, one sd: 13.68522
sd(dt_pred$G5_prefMKT, na.rm = TRUE)
# marginal effect: coeff  0.018
exp(0.018 * 13.68522)
```

```{r}
# dyadic aid, oQ1 5, Q3, 19 - diff 14
summary(dt_pred$G5_prefMKT, na.rm = TRUE)
# marginal effect: coeff  0.018
exp(0.018 * 14)
```


### Table 5: Conditionality and preferences for market liberalization: Regression analysis

With year and disc_country FEs

```{r}
# baseline: year + country FEs
mkt_poi1 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + year_FE + disc_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse1 <- sqrt(diag(vcovCR(mkt_poi1, cluster = dt_pred$disc_ccode, type = "CR2"))) # CR1S

# tokens
mkt_poi2 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + G5_ntoken + year_FE + disc_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse2 <- sqrt(diag(vcovCR(mkt_poi2, cluster = dt_pred$disc_ccode, type = "CR2")))

# UNSC
mkt_poi3 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + UNSC_member + year_FE + disc_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse3 <- sqrt(diag(vcovCR(mkt_poi3, cluster = dt_pred$disc_ccode, type = "CR2")))

# res_import_WDI, FP.CPI.TOTL.ZG (infl_WEO), debtservice_WDI
mkt_poi4 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + res_import_WDI + debtservice_WDI + year_FE + disc_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse4 <- sqrt(diag(vcovCR(mkt_poi4, cluster = dt_pred$disc_ccode, type = "CR2")))

# Trade + EF
mkt_poi5 <- glm(BA1MKT ~ G5_prefMKT + lngdp_WDI + elec_VDem + libdem_VDem + gdpgrowth_WDI + cabal_WEO + L1.cumIMF55 + regqual_WGI + tradegdp_WDI + fi_index_pd + year_FE + disc_FE,
                data = dt_pred, quasipoisson(link = "log"))
mkt_rse5 <- sqrt(diag(vcovCR(mkt_poi5, cluster = dt_pred$disc_ccode, type = "CR2")))
```

```{r}
stargazer(mkt_poi1, mkt_poi2, mkt_poi3, mkt_poi4, mkt_poi5,
          dep.var.labels = c("Market-liberalization reforms"),
          covariate.labels = c("MKT preferences (t-1)", "GDP (log)", "Elections", "Liberal democracy", "GDP growth", "Current account balance (% of GDP)", "Past IMF programs", "Regulatory quality", "Words spoken (G5)", "UNSC membership", "Reserves (% imports)", "Debt service", "Trade (% GDP)", "Economic Freedom Index", "Constant"),
          omit = c("*year_FE*", "*disc_FE*"),
          se = list(mkt_rse1, mkt_rse2, mkt_rse3, mkt_rse4, mkt_rse5),
          title = "", font.size = "small",
          add.lines = list(c("Country FEs", rep(c("Yes"), 5)),
                           c("Year FEs", rep(c("Yes"), 5))),
          # type = "latex", out = "pred2_regG5_fullFEs_Nov24.tex")
          type = "html", out = "pred2_regG5_fullFEs_Nov24.doc")
```


Marginal effects

```{r}
# market liberalization, one sd: 13.68522
sd(dt_pred$G5_prefMKT, na.rm = TRUE)
# marginal effect: coeff  0.013
exp(0.013 * 13.68522)
```





